clear all
set more off
set mem 10000000
set matsize 10000
*version 13
version 12

*************************************************** 
*** Script to Merge SECC Data to Village Census ***
*************************************************** 

** Set file paths
do "$path_code/paths.do"

**************************************************************
**************************************************************

** NOTE: These files are still huge, even after converting to DTA and compressing aggressively
** This script does four things:
**
**		1) Merge to 2001 PCA at village level (using a pared-down version of our fuzzy merge algorithm)
**		2) Drop state/district/block/village names after merging, to (greatly) reduce file size
**		3) Continue cleaning occupation, also to reduce file size
**		4) Append block-wise dtas to make district-wise dtas


** MISSING DISTRICTS
** Uttarakhand, District: Chamoli (1246 villages)
** Rajasthan, District: Jalor (802 villages)
** West Bengal, District: Jalpaiguri (768 villages)
** Jharkhand, District: Dhanbad (1760 villages)
** Tamil Nadu, DIstricts: Dindigul (481 villages); Thanjavur (516 villages)


**************************************************************
**************************************************************

** Step 1: Prep census village names for merge with SECC data
{
use "$pca/pca_census01_names.dta", clear
drop no_hh tot_p tot_m tot_f
joinby st_code dt_code vi_code using "$vd01/vd_2001_names.dta", unmatched(both)
drop _merge
joinby st_code dt_code vi_code using "$pca/pca_census11_names.dta", unmatched(both)
drop _merge
merge m:m st_code11 dt_code11 vi_code11 using "$vd11/vd_2011_all_raw.dta", nogen keepusing(village_vd11 bk* block*) keep(1 3)
merge m:m conc_id using "$vd11/vd_2011_all_raw.dta", nogen ///
          keepusing(bk_code_conc01 village_conc01 bk_code_conc11 village_conc11 block_2001 block_2011)
gen names_id = _n
foreach v of varlist state district* block* village* {
	replace `v' = upper(trim(itrim(`v')))
}
	// transform PCA 2011 district, block, and village codes 
egen min_dt_code11 = min(dt_code11), by(st_code)
gen dt_code11_secc = dt_code11+1-min_dt_code11
	
egen min_bk_code11 = min(bk_code11), by(st_code dt_code11)
gen bk_code11_secc = bk_code11+1-min_bk_code11
gen block11_secc = block11

egen min_vi_code11 = min(vi_code11), by(st_code dt_code11 bk_code11)
gen vi_code11_secc = vi_code11+1-min_vi_code11
drop min_* t_p_vd

	// 3 block codes to reindex in Mizoram
{
replace bk_code11_secc = 5 if bk_code11_secc==6 & dt_code11_secc==4 & st_code==15 & block11_secc=="EAST LUNGDAR (PART)" 
replace bk_code11_secc = 2 if bk_code11_secc==6 & dt_code11_secc==5 & st_code==15 & block11_secc=="SERCHHIP" 	
replace bk_code11_secc = 3 if bk_code11_secc==7 & dt_code11_secc==5 & st_code==15 & block11_secc=="EAST LUNGDAR (PART)" 	
}

	// lots of block codes reindexed in Assam
{
replace bk_code11_secc = 4 if dt_code11_secc==20 & bk_code11_secc==6 & block11_secc=="BIJNI (PT)" & st_code==18
replace bk_code11_secc = 2 if dt_code11_secc==21 & bk_code11_secc==110 & block11_secc=="BENGTOL" & st_code==18
replace bk_code11_secc = 3 if dt_code11_secc==21 & bk_code11_secc==111 & block11_secc=="SIDLI (PT)" & st_code==18
replace bk_code11_secc = 4 if dt_code11_secc==21 & bk_code11_secc==107 & block11_secc=="BONGAIGAON (PT)" & st_code==18
replace bk_code11_secc = 5 if dt_code11_secc==21 & bk_code11_secc==112 & block11_secc=="BIJNI (PT)" & st_code==18
replace bk_code11_secc = 6 if dt_code11_secc==21 & bk_code11_secc==15 & block11_secc=="BARNAGAR (PT)" & st_code==18
replace bk_code11_secc = 4 if dt_code11_secc==25 & bk_code11_secc==123 & block11_secc=="JALAH (PT)" & st_code==18
replace bk_code11_secc = 5 if dt_code11_secc==25 & bk_code11_secc==124 & block11_secc=="GORESWAR (PT)" & st_code==18
replace bk_code11_secc = 6 if dt_code11_secc==25 & bk_code11_secc==99 & block11_secc=="RANGIA (PT)" & st_code==18
replace bk_code11_secc = 9 if dt_code11_secc==25 & bk_code11_secc==121 & block11_secc=="GHOGRAPAR (PT)" & st_code==18
replace bk_code11_secc = 10 if dt_code11_secc==25 & bk_code11_secc==126 & block11_secc=="BASKA" & st_code==18
replace bk_code11_secc = 11 if dt_code11_secc==25 & bk_code11_secc==122 & block11_secc=="BAGANPARA (PT)" & st_code==18
replace bk_code11_secc = 12 if dt_code11_secc==25 & bk_code11_secc==127 & block11_secc=="TAMULPUR" & st_code==18
replace bk_code11_secc = 13 if dt_code11_secc==25 & bk_code11_secc==129 & block11_secc=="PATHORIGHAT (PT)" & st_code==18
replace bk_code11_secc = 2 if dt_code11_secc==27 & bk_code11_secc==106 & block11_secc=="PATHORIGHAT (PT)" & st_code==18
replace bk_code11_secc = 3 if dt_code11_secc==27 & bk_code11_secc==108 & block11_secc=="MANGALDOI (PT)" & st_code==18
replace bk_code11_secc = 4 if dt_code11_secc==27 & bk_code11_secc==110 & block11_secc=="KALAIGAON (PT)" & st_code==18
replace bk_code11_secc = 5 if dt_code11_secc==27 & bk_code11_secc==109 & block11_secc=="DALGAON (PT)" & st_code==18
replace bk_code11_secc = 6 if dt_code11_secc==27 & bk_code11_secc==111 & block11_secc=="HARISINGA" & st_code==18
replace bk_code11_secc = 7 if dt_code11_secc==27 & bk_code11_secc==112 & block11_secc=="UDALGURI" & st_code==18
replace bk_code11_secc = 8 if dt_code11_secc==27 & bk_code11_secc==113 & block11_secc=="MAZBAT" & st_code==18
}
	
duplicates drop
unique pca01_id
drop if st_code==.
compress
save "$secc/names_for_merge.dta", replace

use "$secc/names_for_merge.dta", clear
collapse (count) pca01_id, by(state st_code district dt_code dt_code11 district11 block bk_code_pca bk_code11 bk_code?_vd11 block11 block?_vd11 *secc) fast
compress
save "$secc/names_for_merge_temp_blocks.dta", replace
}

**************************************************************
**************************************************************

** Step 2: Loop through state folders compress occupation, remove state
** ONLY RERUN IF WE REBUILD FROM SCRATCH (i.e. from the raw zipped folders)

forvalues st = 1/36 {
  cd "$secc/secc_dtas/st`st'"
  local dta_files0 : dir . files "secc_indiv_rural_st`st'_dt*.dta"
	foreach f0 in `dta_files0' {

		use "`f0'", clear
		assert st_code==`st'

		** Confirm that state name matches, then delete
		rename state state_secc
		merge m:m st_code using "$secc/names_for_merge_temp_blocks.dta", keep(1 3) keepusing(state) nogen
		assert state_secc==substr(state,1,3) | (state_secc=="UP" & state=="UTTAR PRADESH") | (state_secc=="ODI" & state=="ORISSA") | state==""
		drop state state_secc
		duplicates drop
		
		** Clean occupation, then delete
		replace occ_cleaned = "dependent" if inlist(occ_cleaned,"son","daughter","child","baby","minor","dep","infant")
		replace occ_cleaned = "worker" if inlist(occ_cleaned,"employee","wages","work")
		replace occ_cleaned = "domestic" if inlist(occ_cleaned,"home","house","household","wife","hwife","hw","homework")
		replace occ_cleaned = "agriculture" if regexm(occ_cleaned,"gricult") | regexm(occ_cleaned,"agr") | occ_cleaned=="agc"
		replace occ_cleaned = "none" if inlist(occ_cleaned,"nil","nill","no","none","nothing","unemployed","unem","unemp")
		replace occ_cleaned = "student" if inlist(occ_cleaned,"std","stud","stu")
		*egen temp_count_occ = count(st_code), by(occ_cleaned)
		*tab occ_cleaned if temp_count>100 
		drop occupation 
		
		** Create broad occupation categories
		gen occupation = ""
		replace occupation = "AGR" if occ_cleaned=="agriculture"
		replace occupation = "DOM" if occ_cleaned=="domestic"
		replace occupation = "STU" if occ_cleaned=="student"
		replace occupation = "DEP" if occ_cleaned=="dependent"
		replace occupation = "WRK" if occ_cleaned=="worker"
		replace occupation = "NIL" if occ_cleaned=="none"
		replace occupation = "OTH" if occ_cleaned!="" & occupation==""

		** Store "other" occuptaion (first 8 characters)
		replace occ_cleaned = "" if occupation!="" & occupation!="OTH"
		gen other_occup = substr(occ_cleaned,1,8)
		drop occ_cleaned
		
		** Label and save
		la var occupation "Occupation (AGRiculture,DOMestic,STUdent,DEPendent,WoRKer,NIL,OTHer)"
		la var other_occup "Other occupation"
		compress
		save "`f0'", replace

	}
}		


**************************************************************
**************************************************************

** Step 3: Merge SECC villages to Census villages
** 		   SKIP UTs (4 7 25 26 31 34 35) and Goa (30)

forvalues st = 1/36 {
if inlist(`st',4,7,25,26,30,31,34,35)==0 {

	cd "$secc/secc_dtas_indiv/st`st'"
	** Erase all temp files so the merge starts from scratch
	local dta_files_temp : dir . files "temp_*.dta"
	foreach ftemp in `dta_files_temp' {
		cap erase "`ftemp'"
	} 

	** Within state folders, append all dtas into a single village-wise dta file for merging
	local dta_files1a : dir . files "secc_indiv_rural_st`st'_dt*_0.dta"
	foreach f1a in `dta_files1a' {
		local len = length("secc_indiv_rural_st`st'_dt")+1
		local dt = subinstr(substr("`f1a'",`len',2),"_","",.)
		local dta_files1b : dir . files "secc_indiv_rural_st`st'_dt`dt'_*.dta"
		foreach f1b in `dta_files1b' {
			qui use "`f1b'", clear
			assert length(occupation)<4
			qui collapse (count) tot_p_secc=person_id, by(st_code-village) fast
			qui compress
			cap append using "temp_st`st'_village.dta"
			cap save "temp_st`st'_village.dta", replace
		}
	}
	
	** Add SECC row_id for merge
	use "temp_st`st'_village.dta", clear
	if `st'==36 {
		replace st_code = 28
	}	
	gen row_id = _n
	qui save "temp_st`st'_village.dta", replace

	** Save temporary subset of names_for_merge
	qui use "$secc/names_for_merge.dta", clear
	if `st'!=36 {
		qui keep if st_code==`st'
	}
	else {
		qui keep if st_code==28
	}
	qui compress
	qui save "temp_st`st'_names_for_merge.dta", replace

	** Merge district SECC datasets to PCA 2011
	
	** Save collapsed block versions to speed up merge
	use "temp_st`st'_village.dta", clear
	collapse (count) n_vill=tn_code, by(st_code-block)
	gen row_id2 = _n
	qui compress 
	qui save "temp_st`st'_block.dta", replace
	
	** Determine which district codes to use for merge
	use "temp_st`st'_block.dta", clear
	rename district districtSECC
	qui unique dt_code
	local dt_tot = r(unique) // number of districts that need matching
	
	preserve
	joinby st_code dt_code using "$secc/names_for_merge_temp_blocks.dta", unmatched(none) // match on 2001 dt_code
	qui unique dt_code
	local dt_match_count_01 = r(unique) // store number of districts that matched
	strdist district districtSECC, gen(temp01)
	qui sum temp01
	local dt_name_dist_01_01 = r(mean) // store distance of string matches to 2001 names
	strdist district11 districtSECC, gen(temp11)
	qui sum temp11
	local dt_name_dist_01_11 = r(mean) // store distance of string matches to 2011 names
	restore

	rename dt_code dt_code11_secc
	joinby st_code dt_code11_secc using "$secc/names_for_merge_temp_blocks.dta", unmatched(none) // match on 2011 dt_code
	qui unique dt_code11_secc
	local dt_match_count_11 = r(unique) // store number of districts that matched
	strdist district districtSECC, gen(temp01)
	qui sum temp01
	local dt_name_dist_11_01 = r(mean) // store distance of string matches to 2001 names
	strdist district11 districtSECC, gen(temp11)
	qui sum temp11
	local dt_name_dist_11_11 = r(mean) // store distance of string matches to 2001 names
	
	if `dt_match_count_11'==`dt_tot' {
		local dt_code_name = "dt_code11_secc"
		if `dt_name_dist_11_01'<`dt_name_dist_11_11' {
			local district_name = "district"
			di "mean strdist   " `dt_name_dist_11_01'
		} 
		else {
			local district_name = "district11"
			di "mean strdist   " `dt_name_dist_11_11'
		}
	}
	else if `dt_match_count_01'==`dt_tot' {
		local dt_code_name = "dt_code"
		if `dt_name_dist_01_01'<=`dt_name_dist_01_11' {
			local district_name = "district"
			di "mean strdist   " `dt_name_dist_01_01'
		} 
		else {
			local district_name = "district11"
			di "mean strdist   " `dt_name_dist_01_11'
		}
	}
	else {
		assert `dt_match_count_11'==`dt_tot'
	}
	di min(`dt_name_dist_01_01',`dt_name_dist_01_11',`dt_name_dist_11_01',`dt_name_dist_11_11')


	use "temp_st`st'_block.dta", clear
	rename dt_code `dt_code_name'
	rename district `district_name'
	di " `dt_code_name' " "    " " `district_name' "
	qui save "temp_st`st'_block.dta", replace
	
	use "temp_st`st'_village.dta", clear
	rename dt_code `dt_code_name'
	rename district `district_name'
	qui save "temp_st`st'_village.dta", replace
	
	
	** Determine which block codes to use for merge
*local st = 36
*local dt_code_name = "dt_code11_secc"	
	
	use "temp_st`st'_block.dta", clear
	rename bk_code BK_code_secc
	rename block BLOCK_secc
	qui unique `dt_code_name' BK_code_secc
	local bk_tot = r(unique) // number of districts that need matching
	if `st'==19 {
		qui unique `dt_code_name' BK_code_secc if BK_code_secc!=0
		local bk_tot = r(unique) // West Bengal has  5 apparently redundant blocks
	}
	
*local bk = "11_secc"
*gen bk_code`bk' = BK_code_secc
*joinby st_code `dt_code_name' bk_code`bk' using "$secc/names_for_merge_temp_blocks.dta", unmatched(both) update
*keep if st_code==`st'
	
	local bk_list = "_pca 11_secc 2_vd11"
	if `st'==28 {
		local bk_list = "_pca 11_secc"
	}
	foreach bk in `bk_list' {
		preserve
		gen bk_code`bk' = BK_code_secc
		joinby st_code `dt_code_name' bk_code`bk' using "$secc/names_for_merge_temp_blocks.dta", unmatched(none)
		qui unique `dt_code_name' bk_code`bk'
		local bk_match_count`bk' = r(unique) // store number of blocks that matched
		strdist block BLOCK_secc, gen(temp_pca)
		qui sum temp_pca
		local bk_name_dist`bk'_pca = r(mean) // store distance of string matches to 2001 names
		strdist block11 BLOCK_secc, gen(temp11_secc)
		qui sum temp11_secc
		local bk_name_dist`bk'_11_secc = r(mean) // store distance of string matches to 2011 names
		strdist block2_vd11 BLOCK_secc, gen(temp2_vd11)
		qui sum temp2_vd11
		local bk_name_dist`bk'_2vd11 = r(mean) // store distance of string matches to 2011 VD names 2
		restore
	}
	
	if `st'!=28 & `st'!=36 { 
		if `bk_match_count11_secc'==`bk_tot' {
			local bk_code_name = "bk_code11_secc"
			if `bk_name_dist11_secc_11_secc'<=min(`bk_name_dist11_secc_pca',`bk_name_dist11_secc_2vd11') {
				local block_name = "block11_secc"
				di "mean strdist   " `bk_name_dist11_secc_11_secc'
			} 
			else if `bk_name_dist11_secc_pca'<=`bk_name_dist11_secc_2vd11' {
				local block_name = "block"
				di "mean strdist   " `bk_name_dist11_secc_pca'
			}
			else {
				local block_name = "block2_vd11"
				di "mean strdist   " `bk_name_dist11_secc_2vd11'
			}
		}
		else if `bk_match_count_pca'==`bk_tot' {
			local bk_code_name = "bk_code_pca"
			if `bk_name_dist_pca_pca'<=min(`bk_name_dist_pca_11_secc',`bk_name_dist_pca_2vd11') {
				local block_name = "block"
				di "mean strdist   " `bk_name_dist_pca_pca'
			} 
			else if `bk_name_dist_pca_11_secc'<=`bk_name_dist_pca_2vd11' {
				local block_name = "block11_secc"
				di "mean strdist   " `bk_name_dist_pca_11_secc'
			}
			else {
				local block_name = "block2_vd11"
				di "mean strdist   " `bk_name_dist_pca_2vd11'
			}
		}
		else if `bk_match_count2_vd11'==`bk_tot' {
			local bk_code_name = "bk_code2_vd11"
			if `bk_name_dist2_vd11_2vd11'<=min(`bk_name_dist2_vd11_pca',`bk_name_dist2_vd11_11_secc') {
				local block_name = "block2_vd11"
				di "mean strdist   " `bk_name_dist2_vd11_2vd11'
			} 
			else if `bk_name_dist2_vd11_pca'<=`bk_name_dist2_vd11_11_secc' {
				local block_name = "block"
				di "mean strdist   " `bk_name_dist2_vd11_pca'
			}
			else {
				local block_name = "block11_secc"		
				di "mean strdist   " `bk_name_dist2_vd11_11_secc'
			}
		}
			// Manipur has 3 blocks in SENAPATI that are perpetually messed up in all Census datasets
			// Assam block codes are a mess, so we'll hold our nose for now and see how bad it is 
			// Orissa has 12 block codes that don't match, without obvious unmatched counterparts
			// Tamil Nadu as 1 block code that doesn't match because it was split in 2009
		else if (`st'==14 | `st'==18 | `st'==21 | `st'==33) & `bk_match_count11_secc'>=max(`bk_match_count_pca',`bk_match_count2_vd11') {
			local bk_code_name = "bk_code11_secc"
			if `bk_name_dist11_secc_11_secc'<=min(`bk_name_dist11_secc_pca',`bk_name_dist11_secc_2vd11') {
				local block_name = "block11_secc"		
				di "mean strdist   " `bk_name_dist2_vd11_11_secc'
			} 
			else if `bk_name_dist11_secc_pca'<=`bk_name_dist11_secc_2vd11' {
				local block_name = "block"
				di "mean strdist   " `bk_name_dist11_secc_pca'
			}
			else {
				local block_name = "block2_vd11"
				di "mean strdist   " `bk_name_dist11_secc_2vd11'
			}
		}
		else {
		  di `bk_match_count_pca' "    " `bk_match_count11_secc' "     " `bk_match_count2_vd11' "     " `bk_tot'
			assert `bk_match_count11_secc'==`bk_tot'
		}
		di min(`bk_name_dist11_secc_11_secc',`bk_name_dist11_secc_pca',`bk_name_dist11_secc_2vd11',`bk_name_dist_pca_11_secc',`bk_name_dist_pca_pca',`bk_name_dist_pca_2vd11',`bk_name_dist2_vd11_11_secc',`bk_name_dist2_vd11_pca',`bk_name_dist2_vd11_2vd11')
	}
	else {  // block2_vd11 code are wack for Andhra Pradesh (and I'm assuming likewise for Telangana)
		if `bk_match_count11_secc'==`bk_tot' {
			local bk_code_name = "bk_code11_secc"
			if `bk_name_dist11_secc_11_secc'<=min(`bk_name_dist11_secc_pca',`bk_name_dist11_secc_2vd11') {
				local block_name = "block11_secc"
				di "mean strdist   " `bk_name_dist11_secc_11_secc'
			} 
			else if `bk_name_dist11_secc_pca'<=`bk_name_dist11_secc_2vd11' {
				local block_name = "block"
				di "mean strdist   " `bk_name_dist11_secc_pca'
			}
			else {
				local block_name = "block2_vd11"
				di "mean strdist   " `bk_name_dist11_secc_2vd11'
			}
		}
		else if `bk_match_count_pca'==`bk_tot' {
			local bk_code_name = "bk_code_pca"
			if `bk_name_dist_pca_pca'<=min(`bk_name_dist_pca_11_secc',`bk_name_dist_pca_2vd11') {
				local block_name = "block"
				di "mean strdist   " `bk_name_dist_pca_pca'
			} 
			else if `bk_name_dist_pca_11_secc'<=`bk_name_dist_pca_2vd11' {
				local block_name = "block11_secc"
				di "mean strdist   " `bk_name_dist_pca_11_secc'
			}
			else {
				local block_name = "block2_vd11"
				di "mean strdist   " `bk_name_dist_pca_2vd11'
			}
		}
		// 
		else if (`st'==28 | `st'==36) & `bk_match_count11_secc'>=`bk_match_count_pca' { 
			local bk_code_name = "bk_code11_secc"
			if `bk_name_dist11_secc_11_secc'<`bk_name_dist11_secc_pca' {
				local block_name = "block11_secc"		
				di "mean strdist   " `bk_name_dist11_secc_11_secc'
			} 
			else {
				local block_name = "block"
				di "mean strdist   " `bk_name_dist11_secc_pca'
			}
		}
		else {
		  di `bk_match_count_pca' "    " `bk_match_count11_secc' "     " `bk_tot'
			assert `bk_match_count11_secc'==`bk_tot'
		}
		di min(`bk_name_dist11_secc_11_secc',`bk_name_dist11_secc_pca',`bk_name_dist11_secc_2vd11',`bk_name_dist_pca_11_secc',`bk_name_dist_pca_pca',`bk_name_dist_pca_2vd11')
	}
	
	use "temp_st`st'_village.dta", clear
	rename bk_code `bk_code_name'
	rename block `block_name'
	di "`bk_code_name'   `block_name'"
	qui save "temp_st`st'_village.dta", replace

	
	** Exact merge on 2011 village code
	qui use "temp_st`st'_village.dta", clear
	rename village village_secc
	rename tn_code vi_code11_secc
	joinby st_code `dt_code_name' `bk_code_name' vi_code11_secc using "temp_st`st'_names_for_merge.dta", unmatched(both) _merge(M)	
	qui keep st_code-pca01_id village11 tot_p11 names_id
	qui egen temp_min_pca01_id = min(pca01_id), by(row_id)
	qui drop if pca01_id==. & temp_min_pca01_id!=.
	qui strdist village11 village_secc, gen(vi_name_dist11)
	qui gen village11_nosp = subinstr(village11," ","",.) 
	qui strdist village11_nosp village_secc, gen(vi_name_dist11_nosp)
	qui keep if M==3 & (min(vi_name_dist11, vi_name_dist11_nosp)==0 | ///
						(min(vi_name_dist11, vi_name_dist11_nosp)<3 & length(village_secc)>4))
	qui replace vi_name_dist11 = min(vi_name_dist11,vi_name_dist11_nosp)
	qui gen Mtype = "e11c"
	drop vi_name_dist11_nosp village11_nosp temp* M village11
	qui duplicates drop
	qui compress
	save "temp_st`st'_village_matched.dta", replace
	qui keep row_id
	qui merge m:1 row_id using "temp_st`st'_village.dta", nogen keep(2)
	qui compress
	qui save "temp_st`st'_village_unmatched.dta", replace
	qui use names_id pca01_id using "temp_st`st'_village_matched.dta", clear
	qui duplicates drop
	qui merge m:1 names_id using "temp_st`st'_names_for_merge.dta", nogen keep(2)
	qui compress
	qui save "temp_st`st'_names_for_merge_unmatched.dta", replace

	** Exact merge on 2011 village name
	if `st'!=32 {  // Kerala has zero name matches
		qui use "temp_st`st'_village_unmatched.dta", clear
		qui rename village village11
		joinby st_code `dt_code_name' `bk_code_name' village11 using "temp_st`st'_names_for_merge_unmatched.dta", unmatched(both) _merge(M)	
		qui keep row_id st_code-pca01_id vi_code11_secc tot_p11 names_id
		qui egen temp_min_pca01_id = min(pca01_id), by(row_id)
		qui drop if pca01_id==. & temp_min_pca01_id!=.
		qui gen vi_code_dist = abs(tn_code-vi_code11_secc)
		qui egen min_dist = min(vi_code_dist), by(row_id)
		qui drop if vi_code_dist>min_dist & min_dist!=.
		qui gen pop_diff_pct = abs(tot_p_secc-tot_p11)/tot_p11
		qui sum vi_code_dist, detail
		local dist_max = r(p95)	
		qui sum pop_diff_pct if vi_code_dist<=`dist_max'
		local pop_diff_min = r(p25)
		local pop_diff_max = r(p75)
		qui keep if M==3 & (vi_code_dist<=`dist_max' | inrange(pop_diff_pct,`pop_diff_min',`pop_diff_max'))
		qui gen vi_name_dist11 = 0
		qui gen Mtype = "e11n"
		qui drop vi_code11_secc temp* min_dist pop_diff_pct M
		rename tn_code vi_code11_secc
		rename village11 village_secc
		qui duplicates drop
		qui compress
		qui append using "temp_st`st'_village_matched.dta"
		save "temp_st`st'_village_matched.dta", replace
		qui keep row_id
		qui merge m:1 row_id using "temp_st`st'_village.dta", nogen keep(2)
		qui keep row_id st_code-tot_p_secc
		qui compress
		qui save "temp_st`st'_village_unmatched.dta", replace
		qui use names_id pca01_id using "temp_st`st'_village_matched.dta", clear
		qui duplicates drop
		qui merge m:1 names_id using "temp_st`st'_names_for_merge.dta", nogen keep(2)
		qui compress
		qui save "temp_st`st'_names_for_merge_unmatched.dta", replace	
	}
	
	** Masala Merge on 6 village names (within block)
	qui do "$path_code/merge/masala_merge_lp_server.do"
	cap mkdir "tmp"	
	use "temp_st`st'_names_for_merge_unmatched.dta", clear
	qui gen stdtbk = string(st_code) + " " + string(`dt_code_name') + " " + string(`bk_code_name')
	qui gen stdt = string(st_code) + " " + string(`dt_code_name')
	foreach v of varlist village* {
		qui replace `v' = subinstr(`v',"(","{",.)
		qui replace `v' = subinstr(`v',")","}",.)
	}
	rename village village_pca
	qui replace village_vd = "--DUPLICATE--" if (village_vd==village_pca) & village_vd!=""
	qui replace village11 = "--DUPLICATE--" if (village11==village_vd | village11==village_pca) & village11!=""
	qui replace village_conc01 = "--DUPLICATE--" if (village_conc01==village11 | village_conc01==village_vd | village_conc01==village_pca) & village_conc01!=""
	qui replace village_conc11 = "--DUPLICATE--" if (village_conc11==village_conc01 | village_conc11==village11 | village_conc11==village_vd | village_conc11==village_pca) & village_conc11!=""
	qui replace village_vd11 = "--DUPLICATE--" if (village_vd11==village_conc11 | village_vd11==village_conc01 | village_vd11==village11 | village_vd11==village_vd | village_vd11==village_pca) & village_vd11!=""
	qui compress
	qui save "temp_st`st'_names_for_merge_unmatchedMM.dta", replace
	
	cap erase "temp_st`st'_MMout.dta"
	foreach vi in village_pca village11 village_vd village_conc01 village_conc11 village_vd11 {
		use "temp_st`st'_village_unmatched.dta", clear
		qui gen stdtbk = string(st_code) + " " + string(`dt_code_name') + " " + string(`bk_code_name')
		qui gen stdt = string(st_code) + " " + string(`dt_code_name')
		qui replace village = subinstr(village,"(","{",.)
		qui replace village = subinstr(village,")","}",.)
		rename village `vi'
		cap masala_merge2 stdtbk using "temp_st`st'_names_for_merge_unmatchedMM.dta", s1(`vi') outfile(temp_outMM_`vi') dist(5) quietly
		cap keep if _masala_merge==3
		cap rename `vi' village
		cap append using "temp_st`st'_MMout.dta"
		cap save "temp_st`st'_MMout.dta", replace
	}	
	
	** Process Masala Merge results (within block)
	cap use "temp_st`st'_MMout.dta", clear
	cap gen _masala_merge = .
	cap gen lev_dist = .
	cap gen pca01_id = .
	cap drop if row_id==. | (_masala_merge==. & lev_dist==. & pca01_id==.)
	cap duplicates drop row_id pca01_id names_id, force
	cap duplicates t names_id row_id, gen(dup1)
	cap egen min_dist1 = min(lev_dist), by(names_id row_id)
	cap drop if lev_dist>min_dist1 & dup1>0
	cap duplicates t pca01_id row_id, gen(dup2)
	cap egen min_dist2 = min(lev_dist), by(pca01_id row_id)
	cap drop if lev_dist>min_dist2 & dup2>0
	*cap gen villusing = ""
	*foreach v of varlist *_using {
	*	replace villusing = `v' if villusing==""
	*}
	cap duplicates t row_id, gen(dup3)
	cap egen mean_pca01_id = mean(pca01_id), by(row_id)
	cap drop if pca01_id==. & mean_pca01_id!=. & dup3>0
	cap duplicates t row_id, gen(dup4)
	cap gen vi_code_dist = abs(tn_code - vi_code11_secc)
	cap egen min_vi_code_dist = min(vi_code_dist), by(row_id)
	cap drop if dup4>0 & min_vi_code_dist<vi_code_dist & min_vi_code_dist<10
	cap keep row_id pca01_id names_id tot_p11 vi_code_dist lev_dist
	cap order row_id pca01_id names_id tot_p11 vi_code_dist lev_dist
	cap gen Mtype = "MMbk"
	cap duplicates drop 
	cap compress
	cap merge m:1 row_id using "temp_st`st'_village_unmatched.dta", nogen keep(3)
	cap save "temp_st`st'_MMmatched.dta", replace
	
	qui use "temp_st`st'_village_matched.dta", clear
	cap append using "temp_st`st'_MMmatched.dta"
	qui save "temp_st`st'_village_matched.dta", replace
	qui keep row_id
	qui merge m:1 row_id using "temp_st`st'_village.dta", nogen keep(2)
	qui keep row_id st_code-tot_p_secc
	qui compress
	qui save "temp_st`st'_village_unmatched.dta", replace
	qui use names_id pca01_id using "temp_st`st'_village_matched.dta", clear
	qui duplicates drop
	qui merge m:1 names_id using "temp_st`st'_names_for_merge.dta", nogen keep(2)
	qui compress
	qui save "temp_st`st'_names_for_merge_unmatched.dta", replace
	

	** Masala Merge on 6 village names (not restricting block)
	qui use "temp_st`st'_names_for_merge_unmatched.dta", clear
	cap gen stdtbk = string(st_code) + " " + string(`dt_code_name') + " " + string(`bk_code_name')
	cap gen stdt = string(st_code) + " " + string(`dt_code_name')
	foreach v of varlist village* {
		qui replace `v' = subinstr(`v',"(","{",.)
		qui replace `v' = subinstr(`v',")","}",.)
	}
	qui rename village village_pca
	qui replace village_vd = "--DUPLICATE--" if (village_vd==village_pca) & village_vd!=""
	qui replace village11 = "--DUPLICATE--" if (village11==village_vd | village11==village_pca) & village11!=""
	qui replace village_conc01 = "--DUPLICATE--" if (village_conc01==village11 | village_conc01==village_vd | village_conc01==village_pca) & village_conc01!=""
	qui replace village_conc11 = "--DUPLICATE--" if (village_conc11==village_conc01 | village_conc11==village11 | village_conc11==village_vd | village_conc11==village_pca) & village_conc11!=""
	qui replace village_vd11 = "--DUPLICATE--" if (village_vd11==village_conc11 | village_vd11==village_conc01 | village_vd11==village11 | village_vd11==village_vd | village_vd11==village_pca) & village_vd11!=""
	qui compress
	qui save "temp_st`st'_names_for_merge_unmatchedMM_nobk.dta", replace
	cap erase "temp_st`st'_MMout_nobk.dta"
	foreach vi in village_pca village11 village_vd village_conc01 village_conc11 village_vd11 {
		use "temp_st`st'_village_unmatched.dta", clear
		cap gen stdtbk = string(st_code) + " " + string(`dt_code_name') + " " + string(`bk_code_name')
		cap gen stdt = string(st_code) + " " + string(`dt_code_name')
		cap rename stdtbk stdtbkM
		cap replace village = subinstr(village,"(","{",.)
		cap replace village = subinstr(village,")","}",.)
		cap rename village `vi'
		cap masala_merge2 stdt using "temp_st`st'_names_for_merge_unmatchedMM_nobk.dta", s1(`vi') outfile(temp_outMM_`vi'_nobk) dist(5) quietly
		cap keep if _masala_merge==3
		cap rename `vi' village
		cap append using "temp_st`st'_MMout_nobk.dta"
		cap save "temp_st`st'_MMout_nobk.dta", replace
	}	
	
	** Process Masala Merge results (not restricting block)
	cap use "temp_st`st'_MMout_nobk.dta", clear
	cap gen _masala_merge = .
	cap gen lev_dist = .
	cap gen pca01_id = .
	cap drop if row_id==. | (_masala_merge==. & lev_dist==. & pca01_id==.)
	cap duplicates drop row_id pca01_id names_id, force
	cap duplicates t names_id row_id, gen(dup1)
	cap egen min_dist1 = min(lev_dist), by(names_id row_id)
	cap drop if lev_dist>min_dist1 & dup1>0
	cap duplicates t pca01_id row_id, gen(dup2)
	cap egen min_dist2 = min(lev_dist), by(pca01_id row_id)
	cap drop if lev_dist>min_dist2 & dup2>0
	*cap gen villusing = ""
	*foreach v of varlist *_using {
	*	replace villusing = `v' if villusing==""
	*}
	cap duplicates t row_id, gen(dup3)
	cap egen mean_pca01_id = mean(pca01_id), by(row_id)
	cap drop if pca01_id==. & mean_pca01_id!=. & dup3>0
	cap duplicates t row_id, gen(dup4)
	cap gen vi_code_dist = abs(tn_code - vi_code11_secc)
	cap egen min_vi_code_dist = min(vi_code_dist), by(row_id)
	cap drop if dup4>0 & min_vi_code_dist<vi_code_dist & min_vi_code_dist<10
	cap keep row_id pca01_id names_id tot_p11 vi_code_dist lev_dist bk_group_count
	cap order row_id pca01_id names_id tot_p11 vi_code_dist lev_dist
	cap gen Mtype = "MMnobk"
	cap duplicates drop 
	cap duplicates t row_id, gen(dup5)
	cap gen row_id_count = 1/(dup5+1)
	cap egen bk_group_count = sum(row_id_count), by(stdtbkM stdtbk)
	cap drop dup5 row_id_count
	cap compress
	cap merge m:1 row_id using "temp_st`st'_village_unmatched.dta", nogen keep(3)
	cap save "temp_st`st'_MMmatched_nobk.dta", replace

	qui use "temp_st`st'_village_matched.dta", clear
	cap append using "temp_st`st'_MMmatched_nobk.dta"
	qui save "temp_st`st'_village_matched.dta", replace
	qui keep row_id
	qui merge m:1 row_id using "temp_st`st'_village.dta", nogen keep(2)
	qui keep row_id st_code-tot_p_secc
	qui compress
	qui save "temp_st`st'_village_unmatched.dta", replace
	qui use names_id pca01_id using "temp_st`st'_village_matched.dta", clear
	qui duplicates drop
	qui merge m:1 names_id using "temp_st`st'_names_for_merge.dta", nogen keep(2)
	qui compress
	qui save "temp_st`st'_names_for_merge_unmatched.dta", replace
	
	
	
	!rmdir "tmp" /s /q  // shell command to delete tmp directory after finishing Masala Merge
	cap rmdir "tmp"
	qui use "temp_st`st'_village_matched.dta", clear
	qui unique row_id
	local n_matched = r(unique)
	tab Mtype
	qui use "temp_st`st'_village.dta", clear
	qui unique row_id
	local n_total = r(unique)

	di "*****************************************************"
	di "State `st' done, `n_matched' out of `n_total' matched"
	di "*****************************************************"

}

else {
	di "*****************************************************"
	di "State `st' skipped, because it's either Goa or a UT! "
	di "*****************************************************"
}
}

 
**************************************************************
**************************************************************

** Step 4: Step down all matches, create PCA-SECC crosswalks for each state
** 		   SKIP UTs (4 7 25 26 31 34 35) and Goa (30)

forvalues st = 1/36 {
if inlist(`st',4,7,25,26,30,31,34,35)==0 {

	qui cd "$secc/secc_dtas_indiv/st`st'"
	qui use "temp_st`st'_village_matched.dta", clear
	cap duplicates drop row_id names_id, force
	cap duplicates t row_id, gen(dup_row_id)
	cap duplicates t names_id, gen(dup_names_id)
	cap duplicates t pca01_id, gen(dup_pca01_id)
	cap replace vi_code11_secc = tn_code if vi_code11_secc==.
	cap drop tn_code
	
	// start by keeping exact matches with no dupes
	qui gen keep = 0
	qui replace keep = 1 if inlist(Mtype,"e11c","e11n") & dup_row_id==0 & dup_names_id==0 
	// for many dupes the same SECC village is mistakenly split across multiple row_ids
	qui duplicates t vi_code11_secc names_id, gen(dup_tncode1)
	qui replace keep = 1 if inlist(Mtype,"e11c","e11n") & keep==0 & dup_tncode1>0
	qui duplicates t vi_code11_secc pca01_id, gen(dup_tncode2)
	qui replace keep = 1 if inlist(Mtype,"e11c","e11n") & keep==0 & dup_tncode2>0
	qui order gr_code vi_code11_secc
	qui egen mode_pca01_id = mode(pca01_id), by(st_code-village_secc)
	qui replace keep = 1 if inlist(Mtype,"e11c","e11n") & keep==0 & pca01_id==mode_pca01_id & mode_pca01_id!=.
	
	// next move to MM matches within block
	cap replace keep = 1 if inlist(Mtype,"MMbk") & keep==0 & dup_row_id==0 & dup_names_id==0 & names_id!=. & lev_dist!=.
	cap egen max_lev_dist = max(lev_dist) if keep==0 & inlist(Mtype,"MMbk"), by(pca01_id)
	cap egen min_lev_dist = min(lev_dist) if keep==0 & inlist(Mtype,"MMbk"), by(pca01_id)
	cap replace keep = 1 if inlist(Mtype,"MMbk") & keep==0 & lev_dist==min_lev_dist & min_lev_dist<max_lev_dist & names_id!=. & lev_dist!=.
	cap replace keep = 1 if inlist(Mtype,"MMbk") & keep==0 & dup_tncode1>0 & names_id!=. & lev_dist!=.
	cap replace keep = 1 if inlist(Mtype,"MMbk") & keep==0 & dup_tncode2>0 & names_id!=. & lev_dist!=.
	cap replace keep = 1 if inlist(Mtype,"MMbk") & keep==0 & pca01_id==mode_pca01_id & mode_pca01_id!=. & names_id!=. & lev_dist!=.

	// finally, carefully consider MM matches that don't restrict block
	cap replace keep = 1 if inlist(Mtype,"MMnobk") & keep==0 & dup_row_id==0 & dup_names_id==0 & bk_group_count>=5 & names_id!=. & lev_dist!=.
	cap egen max_lev_dist2 = max(lev_dist) if keep==0 & inlist(Mtype,"MMnobk"), by(pca01_id)
	cap egen min_lev_dist2 = min(lev_dist) if keep==0 & inlist(Mtype,"MMnobk"), by(pca01_id)
	cap replace keep = 1 if inlist(Mtype,"MMnobk") & keep==0 & lev_dist==min_lev_dist2 & min_lev_dist2<max_lev_dist2 & bk_group_count>=5 & names_id!=. & lev_dist!=.
	cap replace keep = 1 if inlist(Mtype,"MMnobk") & keep==0 & dup_tncode1>0 & bk_group_count>=5 & names_id!=. & lev_dist!=. 
	cap replace keep = 1 if inlist(Mtype,"MMnobk") & keep==0 & dup_tncode2>0 & bk_group_count>=5 & names_id!=. & lev_dist!=.
	cap replace keep = 1 if inlist(Mtype,"MMnobk") & keep==0 & pca01_id==mode_pca01_id & mode_pca01_id!=. & bk_group_count>=5 & names_id!=. & lev_dist!=.
	
	cap keep if keep==1
	cap drop keep
	qui duplicates t row_id, gen(dup_row_id2)
	qui replace village_secc = village if village_secc=="" & village!=""
	qui keep row_id pca01_id village_secc tot_p_secc
	qui duplicates drop row_id pca01_id, force
	cap duplicates t row_id, gen(dup)
	cap drop if dup>0
	cap drop dup

	qui compress
	qui save "temp_st`st'_village_pca_xwalk.dta", replace
	
	qui unique row_id
	qui local n_row_id = r(unique)
	assert `n_row_id'==_N
	qui unique pca01_id
	qui local n_pca01_id = r(unique)

	di "**************************************************************************"
	di "State `st' : `n_row_id' SECC villages matched to `n_pca01_id' PCA villages"
	di "**************************************************************************"

}
}
	
	
**************************************************************
**************************************************************

** Step 5: Merge PCA01_ID into SECC data, save district-level datasets
** 		     For UTs (4 7 25 26 31 34 35) and Goa (30), save as district-level datasets anyways
	
forvalues st = 1/36 {
if inlist(`st',4,7,25,26,30,31,34,35)==0 {
	
	// merge xwalk back into village names dataset
	qui cd "$secc/secc_dtas_indiv/st`st'"
	qui use "temp_st`st'_village_pca_xwalk.dta", clear
	qui rename tot_p_secc tot_p_secc_test
	qui merge 1:1 row_id using "temp_st`st'_village.dta"
	assert _merge!=1
	assert tot_p_secc_test==tot_p_secc if _merge==3
	assert subinstr(subinstr(village_secc,"{","(",.),"}",")",.)==subinstr(subinstr(village,"{","(",.),"}",")",.) if _merge==3
	qui drop _merge tot_p_secc tot_p_secc_test village_secc
	
	// rename block and district vars back to pre-merge names
	qui rename dt_code dt_code
	qui rename district district
	qui rename bk_code bk_code
	qui rename block block
	if `st'==36 {
		qui replace st_code = 36
	}
	
	// save final crosswalk of matched villages
	qui duplicates drop
	qui compress
	qui save "temp_st`st'_village_pca_merged.dta", replace

	// loop over person-specific SECC dta files 
	local dta_files2a : dir . files "secc_indiv_rural_st`st'_dt*_0.dta"
	foreach f2a in `dta_files2a' {
		local len = length("secc_indiv_rural_st`st'_dt")+1
		local dt = subinstr(substr("`f2a'",`len',2),"_","",.)
		local dta_files2b : dir . files "secc_indiv_rural_st`st'_dt`dt'_*.dta"
		foreach f2b in `dta_files2b' {

			// merge with PCA xwalk, on full person-specific SECC data files 
			qui use "`f2b'", clear
			local dt = dt_code[1]
			local bk1 = bk_code[1]
			local bk2 = bk_code[_N]
			cap gen gr_code = .  // 1 block in Rajasthan (at least) is missing gr_code
			order gr_code, before(tn_code)
			qui merge m:1 st_code-tn_code using "temp_st`st'_village_pca_merged.dta"
			assert _merge!=1
			assert _merge==3 if dt_code==`dt' & (bk_code==`bk1' | bk_code==`bk2')
			assert dt_code==`dt' & (bk_code==`bk1' | bk_code==`bk2') if _merge==3
			qui drop if _merge==2
		
			// drop strings that are now unnecessary
			qui drop district block village row_id _merge
			qui order pca01_id
			qui duplicates drop
			qui compress
		
			// save as new district-specific file, including PCA01_id!
			cap append using "secc_pca_indiv_rural_st`st'_dt`dt'.dta"
			qui duplicates drop
			qui save "secc_pca_indiv_rural_st`st'_dt`dt'.dta", replace
			
			// erase block file (now redundant)
			erase "`f2b'"
		}
	}
}

else {

	// loop over person-specific SECC dta files 
	qui cd "$secc/secc_dtas_indiv/st`st'"
	local dta_files2a : dir . files "secc_indiv_rural_st`st'_dt*_0.dta"
	foreach f2a in `dta_files2a' {
		local len = length("secc_indiv_rural_st`st'_dt")+1
		local dt = subinstr(substr("`f2a'",`len',2),"_","",.)
		local dta_files2b : dir . files "secc_indiv_rural_st`st'_dt`dt'_*.dta"
		foreach f2b in `dta_files2b' {

			// load un-merged-state files
			qui use "`f2b'", clear
			cap gen gr_code = .  // 1 block in Rajasthan (at least) is missing gr_code
			order gr_code, before(tn_code)
				
			// save as new district-specific file
			cap append using "secc_pca_indiv_rural_st`st'_dt`dt'.dta"
			qui duplicates drop
			qui compress
			qui save "secc_pca_indiv_rural_st`st'_dt`dt'.dta", replace
			
			// erase block file (now redundant)
			erase "`f2b'"
		}
	}
}

di "State `st' okay!"

}
	
**************************************************************
**************************************************************

** Step 6: Confirm no more block-specific files exist

forvalues st = 1/36 {
	qui cd "$secc/secc_dtas_indiv/st`st'"
	local dta_files_block : dir . files "secc_indiv_rural_*.dta"
	foreach ftemp in `dta_files_block' {
		di "`ftemp'"
	} 
}

**************************************************************
**************************************************************

** Step 7: Erase all temp files

forvalues st = 1/36 {
	cd "$secc/secc_dtas_indiv/st`st'"
	local dta_files_temp2 : dir . files "temp_*.dta"
	foreach ftemp2 in `dta_files_temp2' {
		cap erase "`ftemp2'"
	} 
}

**************************************************************
**************************************************************

** Step 8: Go back and re-clean variables, just in case any didn't get cleaned properly 
forvalues st = 1/36 {
if `st'!=4 {

	cd "$secc/secc_dtas_indiv/st`st'"
	local dta_files_dt : dir . files "secc_pca_indiv_rural_st`st'_dt*.dta"
	foreach f in `dta_files_dt' {
		
		use "`f'", clear
		
		replace gender = "M" if gender=="1"
		replace gender = "F" if gender=="2"
		replace gender = "" if inlist(gender,"M","F")==0

		replace marital = "M" if marital=="2"
		replace marital = "D" if marital=="5"
		replace marital = "N" if marital=="1"
		replace marital = "S" if marital=="4"
		replace marital = "W" if marital=="3"
		replace marital = "" if inlist(marital,"M","D","N","S","W")==0

		replace edu_level = "IL" if edu_level=="1"
		replace edu_level = "<P" if edu_level=="2"
		replace edu_level = "PR" if edu_level=="3"
		replace edu_level = "MI" if edu_level=="4"
		replace edu_level = "SE" if edu_level=="5"
		replace edu_level = "HS" if edu_level=="6"
		replace edu_level = "GR" if edu_level=="7"
		replace edu_level = "OT" if edu_level=="8"
		replace edu_level = "" if inlist(edu_level,"IL","<P","PR","MI","SE","HS","GR","OT")==0

		replace caste_group = "SC" if caste_group=="1"
		replace caste_group = "ST" if caste_group=="2"
		replace caste_group = "OT" if caste_group=="3"
		replace caste_group = "" if caste_group=="4"
		replace caste_group = "" if inlist(caste_group,"SC","ST","OT")==0
		
		replace wall_mat = "OTH" if wall_mat=="0"
		replace wall_mat = "BBK" if wall_mat=="8"
		replace wall_mat = "CON" if wall_mat=="9"
		replace wall_mat = "GMA" if wall_mat=="7"
		replace wall_mat = "GTB" if wall_mat=="1"
		replace wall_mat = "MUB" if wall_mat=="3"
		replace wall_mat = "PLP" if wall_mat=="2"
		replace wall_mat = "STN" if wall_mat=="5"
		replace wall_mat = "STM" if wall_mat=="6"
		replace wall_mat = "WOO" if wall_mat=="4"
		replace wall_mat = "" if length(wall_mat)!=3
		
		replace roof_mat = "OTH" if roof_mat=="0"
		replace roof_mat = "BBK" if roof_mat=="5"
		replace roof_mat = "CON" if roof_mat=="9"
		replace roof_mat = "GMA" if roof_mat=="8"
		replace roof_mat = "GTB" if roof_mat=="1"
		replace roof_mat = "HMT" if roof_mat=="3"
		replace roof_mat = "MMT" if roof_mat=="4"
		replace roof_mat = "PLP" if roof_mat=="2"
		replace roof_mat = "SLA" if roof_mat=="7"
		replace roof_mat = "STO" if roof_mat=="6"
		replace roof_mat = "" if length(roof_mat)!=3

		replace hh_own = "O" if hh_own=="1"
		replace hh_own = "R" if hh_own=="2"
		replace hh_own = "N" if hh_own=="3"
		replace hh_own = "" if inlist(hh_own,"O","R","N")==0
		
		replace pub_priv = "GO" if pub_priv=="1"
		replace pub_priv = "PR" if pub_priv=="2"
		replace pub_priv = "PU" if pub_priv=="3"
		replace pub_priv = "NA" if inlist(pub_priv,"GO","PR","PU")==0
		
		replace main_src_of_hh_inc = "CUL" if main_src_of_hh_inc=="1"
		replace main_src_of_hh_inc = "FOR" if main_src_of_hh_inc=="4"
		replace main_src_of_hh_inc = "LAB" if main_src_of_hh_inc=="2"
		replace main_src_of_hh_inc = "ENT" if main_src_of_hh_inc=="5"
		replace main_src_of_hh_inc = "OTH" if main_src_of_hh_inc=="7"
		replace main_src_of_hh_inc = "DOM" if main_src_of_hh_inc=="3"
		replace main_src_of_hh_inc = "BEG" if main_src_of_hh_inc=="6" | regexm(main_src_of_hh_inc,"BEG")==1
		replace main_src_of_hh_inc = "" if length(main_src_of_hh_inc)!=3

		replace own_phone = "M" if own_phone=="2"
		replace own_phone = "L" if own_phone=="1"
		replace own_phone = "N" if own_phone=="4"
		replace own_phone = "B" if own_phone=="3"
		replace own_phone = "" if inlist(own_phone,"M","L","N","B")==0
		
		replace highest = "<5" if highest=="1"
		replace highest = "5-10" if highest=="2"
		replace highest = ">10" if highest=="3" | highest=="10 OR MORE" | (regexm(highest,"10")==1 & regexm(highest,"5")==0)
		replace highest = "" if inlist(highest,"<5","5-10",">10")==0
		
		foreach v in salaried_job own_oprt_ent_reg_wt_gvt own_frig own_motor_veh ///
				 mech_3_4_wheeler_agr_eqp irr_equip own_any_land ///
							 bonded_labor manual_scavenger credit_card  {
			cap replace `v' = 0 if `v'==2
			cap destring `v', replace
		}						 

		compress
		save "`f'", replace
	}	
}
}

**************************************************************
**************************************************************
